In [69]:

    
import pandas as pd



In [3]:

    
import ghgmosh as gm



In [174]:

    
reload(gm)









    Out[174]:





<module 'ghgmosh' from 'ghgmosh.py'>

get data from compustat



In [16]:

    
usa = pd.io.excel.read_excel("../CDPdata/s12_american.xlsx")
world = pd.io.excel.read_excel("../CDPdata/s12_world.xlsx")



In [5]:

    
world.columns









    Out[5]:





Index([u'Global Company Key', u'Industry Format', u'Data Format', u'Level of Consolidation - Company Annual Descriptor', u'Population Source', u'Data Year - Fiscal', u'Fiscal Year-end Month', u'Data Date', u'Assets - Total', u'Cost of Goods Sold', u'Property, Plant and Equipment - Total (Net)', u'Revenue - Total', u'Stockholders Equity - Parent', u'International Security Identification Number', u'Net Income (Loss) - Consolidated', u'Company Name', u'Active/Inactive Status Marker', u'Current ISO Country Code - Incorporation', u'Current ISO Country Code - Headquarters'], dtype='object')



In [6]:

    
usa.columns









    Out[6]:





Index([u'Global Company Key', u'Data Date', u'Data Year - Fiscal', u'Industry Format', u'Level of Consolidation - Company Annual Descriptor', u'Population Source', u'Data Format', u'Ticker Symbol', u'ISO Currency Code', u'Fiscal Year-end Month', u'Assets - Total', u'Cost of Goods Sold', u'Net Income (Loss)', u'Property, Plant and Equipment - Total (Net)', u'Revenue - Total', u'Stockholders Equity - Parent', u'Active/Inactive Status Marker'], dtype='object')



In [17]:

    
world.rename(columns={"International Security Identification Number":"ISIN", 
                      'Net Income (Loss) - Consolidated': "income"}, inplace=True)
world = world[world["ISIN"].isnull()==False]
world = world[world["Revenue - Total"].isnull()==False]
len(world["ISIN"].value_counts().index) # 900
# chinese companies didn't return data









    Out[17]:





900



In [18]:

    
usa.rename(columns={"Ticker Symbol":"Ticker",
                    'Net Income (Loss)':"income"}, inplace=True)
len(usa['Ticker'].value_counts().index) # 347









    Out[18]:





347



In [19]:

    
orginfos = pd.read_pickle("../CDPdata/orginfos.pkl")



In [20]:

    
orginfos["Ticker"] = orginfos["Ticker"].apply(lambda(x): str(x).split(" ")[0])
orginfos_byticker = orginfos.reset_index().set_index("Ticker")
orginfos_byticker = orginfos_byticker[orginfos_byticker["Country"]== "USA"]
orginfos_byisin = orginfos.reset_index().set_index("ISIN")



In [21]:

    
usa.set_index("Ticker", inplace=True)
world.set_index("ISIN", inplace=True)



In [22]:

    
usainfos = usa.join(orginfos_byticker[["Organisation"]])
worldinfos = world.join(orginfos_byisin[["Organisation"]])



In [23]:

    
len(usainfos["Organisation"].value_counts().index) # 347
len(worldinfos["Organisation"].value_counts().index) # 900 perfect









    Out[23]:





900



In [24]:

    
usainfos = usainfos.reset_index().set_index("Organisation")
worldinfos = worldinfos.reset_index().set_index("Organisation")



In [31]:

    
s12_financials = pd.concat([usainfos, worldinfos])
s12_financials.rename(columns={"Data Year - Fiscal":"year", "Revenue - Total":"Revenues", "Cost of Goods Sold":"COGS",
                               'Stockholders Equity - Parent': "Equity", 'Assets - Total': "Assets", "income": "Income",
                               'Property, Plant and Equipment - Total (Net)':"PPE"},inplace=True)
s12_financials = s12_financials.reset_index().set_index(["Organisation","year"])
s12_financials.to_pickle("../CDPdata/s12_financials.pkl")



In [27]:

    
s12_financials["ISO Currency Code"].value_counts() # all USD thank goodness









    Out[27]:





USD    2055
dtype: int64



In [70]:

    
# combine with s12_completed to compute intensities
s12_financials = pd.read_pickle("../CDPdata/s12_financials.pkl")
scopes12_c = pd.read_pickle("../CDPdata/s12_completed.pkl")



In [71]:

    
s12_ghgfins = scopes12_c[["Country", "GICS Industry",
                          "GICS Sector", "scope1", "scope2"]].join(s12_financials[fm.FINCOLS], how="inner")



In [72]:

    
s12_ghgfins["1and2 total"] = s12_ghgfins["scope1"]+ s12_ghgfins["scope2"]
s12_ghgfins["GICS Sector"].replace("Banks", "Financials", inplace=True)



In [36]:

    
import finmosh as fm



In [73]:

    
reload(fm)









    Out[73]:





<module 'finmosh' from 'finmosh.pyc'>



In [74]:

    
s = s12_ghgfins.reset_index()
s12_ghgfins = fm.adjust_inflation_all(s)



In [75]:

    
for cols in fm.FINCOLS:
    s12_ghgfins[cols].fillna(0, inplace=True)



In [76]:

    
s12_ghgfins["1and2 intensity"] = s12_ghgfins["1and2 total"]/s12_ghgfins["Revenues"]
s12_ghgfins = s12_ghgfins[s12_ghgfins["1and2 intensity"].isnull() == False]
s12_ghgfins.set_index(["Organisation", "year"], inplace=True)
s12_ghgfins.sort_index(inplace=True)



In [78]:

    
len(s12_ghgfins.index.levels[0].value_counts().index) #1243 instead of 1247 requested from COMPUSTAT
len(s12_ghgfins) # 4673









    Out[78]:





1243



In [79]:

    
# revs and cogs are in millions USD jan 2015
# intensity is in tons/million
s12_ghgfins.to_pickle("../CDPdata/s12_ghgfins.pkl")



In [80]:

    
s12_ghgfins.head()









    Out[80]:






  
    
      
      
      Country
      GICS Industry
      GICS Sector
      scope1
      scope2
      Revenues
      COGS
      Equity
      PPE
      Assets
      Income
      1and2 total
      1and2 intensity
    
    
      Organisation
      year
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      3M Company
      2009
       USA
       Industrial Conglomerates
       Industrials
       3290000
       1690000
       24939.230138
       11709.779078
       13766.567205
       7549.825324
       29390.391440
       3443.798894
       4980000
       199.685394
    
    
      2010
       USA
       Industrial Conglomerates
       Industrials
       4300000
       1950000
       28294.483474
       13489.279853
       16622.027404
       7724.684765
       32002.417059
       4335.119833
       6250000
       220.891115
    
    
      2011
       USA
       Industrial Conglomerates
       Industrials
       4060000
       2030000
       30530.950861
       14906.148276
       15899.066640
       7904.166334
       32598.241952
       4416.063711
       6090000
       199.469713
    
    
      2012
       USA
       Industrial Conglomerates
       Industrials
       4540000
       2230000
       30349.027827
       14611.254468
       17836.549092
       8502.680415
       34380.138666
       4510.135088
       6770000
       223.071396
    
    
      2013
       USA
       Industrial Conglomerates
       Industrials
       4550000
       2230000
       30843.417282
       14721.834526
       17486.362258
       8644.269584
       33520.023641
       4654.837262
       6780000
       219.820000



In [83]:

    
s = s12_ghgfins.reset_index()
s[s.duplicated(["Organisation", "year"])]









    Out[83]:






  
    
      
      Organisation
      year
      Country
      GICS Industry
      GICS Sector
      scope1
      scope2
      Revenues
      COGS
      Equity
      PPE
      Assets
      Income
      1and2 total
      1and2 intensity
    
  
  
    
      407 
       Associated British Foods
       2012
       United Kingdom
                  Food Products
       Consumer Staples
       2295328.00
       1067934.00
       12434.332830
        8906.603405
        5920.820905
        4608.578630
       10392.390481
        563.259445
       3363262.00
       270.481902
    
    
      1657
            Gold Fields Limited
       2009
         South Africa
                Metals & Mining
              Materials
       1308764.00
       5093511.00
       16698.811506
       10486.815230
           0.000000
           0.000000
           0.000000
       2605.552574
       6402275.00
       383.397046
    
    
      1659
            Gold Fields Limited
       2010
         South Africa
                Metals & Mining
              Materials
       1377194.00
       5164897.00
       19429.083823
       11535.451426
       46208.132505
       56510.223767
       76211.406666
        -80.971761
       6542091.00
       336.716392
    
    
      2614
          Massmart Holdings Ltd
       2012
         South Africa
       Food & Staples Retailing
       Consumer Staples
         25673.73
        298522.25
       36660.172304
       29962.560806
        4810.235661
        3925.766100
       23362.377969
        702.095287
        324195.98
         8.843275
    
    
      4210
             Tongaat Hulett Ltd
       2009
         South Africa
                  Food Products
       Consumer Staples
        787711.00
        309388.00
       12010.693544
        9820.165654
        4932.193030
        8315.593321
       14418.009276
       3125.627684
       1097099.00
        91.343518

graph histograms of raw data

it should vary the most by sector



In [11]:

    
import numpy as np
from collections import OrderedDict
from bokeh.charts import Scatter
from bokeh.charts import Histogram
from bokeh.plotting import output_notebook, show
output_notebook()









    




    
        
        
        
    
        
        BokehJS successfully loaded.



In [12]:

    
import datavis as dv



In [70]:

    
ghgfins_s = s12_ghgfins.reset_index().set_index(["GICS Sector","year"])
ghgfins_yr = s12_ghgfins.reset_index().set_index("year")



In [71]:

    
ghgfins_s.index.levels[0]









    Out[71]:





Index([u'Consumer Discretionary', u'Consumer Staples', u'Energy', u'Financials', u'Health Care', u'Industrials', u'Information Technology', u'Materials', u'Telecommunication Services', u'Utilities'], dtype='object')



In [140]:

    
sector = "Consumer Staples"
int_s = OrderedDict()
for yr in range(2009,2014):
    vs = np.array(ghgfins_s.loc[sector,yr]["1and2 intensity"].tolist())
    int_s[str(yr)] = vs[vs<500]



In [124]:

    
intensity = OrderedDict()
for yr in range(2009,2014):
    vs = np.array(ghgfins_yr.loc[yr]["1and2 intensity"].tolist())
    intensity[str(yr)] = vs[vs<300]



In [141]:

    
# histograms of intensities over time for each sector
title = "Scope 1 and 2 Intensity Consumer Staples"
fname = "12intyear_energy.html"
hist = Histogram(int_s, bins=20, filename=fname, title = title, legend="top_right",
                 ylabel="Proportion of Companies", xlabel="Intensity in tCO2e/$million")



In [142]:

    
show(hist)



In [ ]:

		Country	GICS Industry	GICS Sector	scope1	scope2	Revenues	COGS	Equity	PPE	Assets	Income	1and2 total	1and2 intensity
Organisation	year
3M Company	2009	USA	Industrial Conglomerates	Industrials	3290000	1690000	24939.230138	11709.779078	13766.567205	7549.825324	29390.391440	3443.798894	4980000	199.685394
	2010	USA	Industrial Conglomerates	Industrials	4300000	1950000	28294.483474	13489.279853	16622.027404	7724.684765	32002.417059	4335.119833	6250000	220.891115
	2011	USA	Industrial Conglomerates	Industrials	4060000	2030000	30530.950861	14906.148276	15899.066640	7904.166334	32598.241952	4416.063711	6090000	199.469713
	2012	USA	Industrial Conglomerates	Industrials	4540000	2230000	30349.027827	14611.254468	17836.549092	8502.680415	34380.138666	4510.135088	6770000	223.071396
	2013	USA	Industrial Conglomerates	Industrials	4550000	2230000	30843.417282	14721.834526	17486.362258	8644.269584	33520.023641	4654.837262	6780000	219.820000

	Organisation	year	Country	GICS Industry	GICS Sector	scope1	scope2	Revenues	COGS	Equity	PPE	Assets	Income	1and2 total	1and2 intensity
407	Associated British Foods	2012	United Kingdom	Food Products	Consumer Staples	2295328.00	1067934.00	12434.332830	8906.603405	5920.820905	4608.578630	10392.390481	563.259445	3363262.00	270.481902
1657	Gold Fields Limited	2009	South Africa	Metals & Mining	Materials	1308764.00	5093511.00	16698.811506	10486.815230	0.000000	0.000000	0.000000	2605.552574	6402275.00	383.397046
1659	Gold Fields Limited	2010	South Africa	Metals & Mining	Materials	1377194.00	5164897.00	19429.083823	11535.451426	46208.132505	56510.223767	76211.406666	-80.971761	6542091.00	336.716392
2614	Massmart Holdings Ltd	2012	South Africa	Food & Staples Retailing	Consumer Staples	25673.73	298522.25	36660.172304	29962.560806	4810.235661	3925.766100	23362.377969	702.095287	324195.98	8.843275
4210	Tongaat Hulett Ltd	2009	South Africa	Food Products	Consumer Staples	787711.00	309388.00	12010.693544	9820.165654	4932.193030	8315.593321	14418.009276	3125.627684	1097099.00	91.343518